- 1 Tabulate a sum list of all RNA and smallRNA counts by cell line/treatment type
- 2 Add MIMAT ID to small RNAseq data
- 3 Beginning with sensitive cell lines, creating a count table of miRNA, mRNA, and hybrids
- 4 Create a generalized match list that will merge all datasets eventually
- 5 Count hybrids, add to match list
- 6 Count small RNAseq data, add to match list
- 7 Count total RNAseq data, add to match list
- 8 Repeat the above steps, using resistant cell line data
- 9 Create a generalized match list that will merge all datasets eventually
- 10 Count hybrids, add to match list
- 11 Count small RNAseq data, add to match list
- 12 Count total RNAseq data, add to match list
- 13 Merge all dataframes to create a singular count table of all cell lines and treatments
- 14 Create a count of whether hybrids are seen in triplicates, duplicates, or as unique hybrids
- 15 Create a 3D Representation of the log Value of Counts
hyb_sensitive <- readRDS(here::here("initial_data/hyb_sensitive.rds"))
hyb_resistant <- readRDS(here::here("initial_data/hyb_resistant.rds"))
totalRNA_count <- readRDS(here::here("initial_data/totalRNA_count.rds"))
smallRNA_count <- readRDS(here::here("initial_data/smallRNA_count.rds"))
totalRNAseq <- readRDS(here::here("initial_data/total_RNAseq_all_annot_results.rds"))
DEGs_smallRNAseq <- readRDS(here::here("initial_data/DEGs_smallRNAseq.rds"))
Tabulate a sum list of all RNA and smallRNA counts by cell line/treatment type
totalRNA_count$IPC_sum <- rowSums(totalRNA_count[ , c(3:5,21:23)], na.rm=TRUE)
totalRNA_count$IPC_R_sum <- rowSums(totalRNA_count[ , c(6:8, 24:26)], na.rm=TRUE)
totalRNA_count$MJ_sum <- rowSums(totalRNA_count[ , c(9:11,27:29)], na.rm=TRUE)
totalRNA_count$MJ_R_sum <- rowSums(totalRNA_count[ , c(12:14, 30:32)], na.rm=TRUE)
totalRNA_count$SK_sum <- rowSums(totalRNA_count[ , c(15:17, 33:35)], na.rm=TRUE)
totalRNA_count$SK_R_sum <- rowSums(totalRNA_count[ , c(18:20,36:38)], na.rm=TRUE)
totalRNA_sum <- totalRNA_count[,c(1,2,39:44)]
names(totalRNA_sum)[names(totalRNA_sum) == 'gene_name'] <- 'mRNA'
#saveRDS(totalRNA_sum, "totalRNA_sum.RDS")
smallRNA_count$IPC_sum <- rowSums(smallRNA_count[ , c(3:5)], na.rm=TRUE)
smallRNA_count$IPC_R_sum <- rowSums(smallRNA_count[ , c(6:8)], na.rm=TRUE)
smallRNA_count$MJ_sum <- rowSums(smallRNA_count[ , c(9:11)], na.rm=TRUE)
smallRNA_count$MJ_R_sum <- rowSums(smallRNA_count[ , c(12:14)], na.rm=TRUE)
smallRNA_count$SK_sum <- rowSums(smallRNA_count[ , c(15:17)], na.rm=TRUE)
smallRNA_count$SK_R_sum <- rowSums(smallRNA_count[ , c(18:20)], na.rm=TRUE)
smallRNA_sum <- smallRNA_count[, c(1,20:25)]
#saveRDS(smallRNA_sum, "smallRNA_sum.RDS")
miRNANames = smallRNA_sum$Molecule
smallRNA_sum2 <- miRNA_NameToAccession(miRNANames,version = "v18")
names(smallRNA_sum2)[names(smallRNA_sum2) == 'miRNAName_v18'] <- 'Molecule'
smallRNA_sum <- merge(smallRNA_sum, smallRNA_sum2, by = "Molecule")
names(smallRNA_sum)[names(smallRNA_sum) == 'Accession'] <- 'MIMAT'
#smallRNA_sum <- smallRNA_sum[, c(1, 20, 2:19)]
Add MIMAT ID to small RNAseq data
DEGs_smallRNAseq$gene_name = paste('hsa', DEGs_smallRNAseq$gene_name, sep='-')
miRNANames = DEGs_smallRNAseq$gene_name
DEGs_smallRNAseq2 <- miRNA_NameToAccession(miRNANames,version = "v18")
names(DEGs_smallRNAseq2)[names(DEGs_smallRNAseq2) == 'miRNAName_v18'] <- 'gene_name'
DEGs_smallRNAseq2 <- distinct(DEGs_smallRNAseq2, gene_name, .keep_all = TRUE)
DEGs_smallRNAseq <- merge(DEGs_smallRNAseq, DEGs_smallRNAseq2, by = "gene_name")
names(DEGs_smallRNAseq)[names(DEGs_smallRNAseq) == 'Accession'] <- 'MIMAT'
#saveRDS(DEGs_smallRNAseq, "MIMAT_smallRNA_DEGs.rds")
Beginning with sensitive cell lines, creating a count table of miRNA, mRNA, and hybrids
Create a generalized match list that will merge all datasets eventually
ipc_hybrids <- hyb_sensitive %>% filter(grepl("IPC", Sample))
sk_hybrids <- hyb_sensitive %>% filter(grepl("SK", Sample))
mj_hybrids <- hyb_sensitive %>% filter(grepl("MJ", Sample))
ipc_match_list <- select(ipc_hybrids, miRNA, MIMAT, mRNA, hybrids)
sk_match_list <- select(sk_hybrids, miRNA, MIMAT, mRNA, hybrids)
mj_match_list <- select(mj_hybrids, miRNA, MIMAT, mRNA, hybrids)
ipc_match_list <- distinct(ipc_match_list, hybrids, .keep_all = TRUE)
sk_match_list <- distinct(sk_match_list, hybrids, .keep_all = TRUE)
mj_match_list <- distinct(mj_match_list, hybrids, .keep_all = TRUE)
Count hybrids, add to match list
IPC_hyb <- ipc_hybrids %>%
group_by(hybrids) %>%
tally(sort = TRUE)
ipc_match_list <- merge(ipc_match_list, IPC_hyb, by = 'hybrids', .keep_all = TRUE)
SK_hyb <- sk_hybrids %>%
group_by(hybrids) %>%
tally(sort = TRUE)
sk_match_list <- merge(sk_match_list, SK_hyb, by = "hybrids", .keep_all = TRUE)
MJ_hyb <- mj_hybrids %>%
group_by(hybrids) %>%
tally(sort = TRUE)
mj_match_list <- merge(mj_match_list, MJ_hyb, by = "hybrids", .keep_all = TRUE)
Count small RNAseq data, add to match list
IPC_smallRNA <- smallRNA_sum[, c(2,8)]
ipc_match_list <- merge(ipc_match_list, IPC_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(ipc_match_list)[names(ipc_match_list) == 'IPC_sum'] <- 'miRNA_count'
names(ipc_match_list)[names(ipc_match_list) == 'n'] <- 'hybrid_count'
sk_smallRNA <- smallRNA_sum[, c(6,8)]
sk_match_list <- merge(sk_match_list, sk_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(sk_match_list)[names(sk_match_list) == 'SK_sum'] <- 'miRNA_count'
names(sk_match_list)[names(sk_match_list) == 'n'] <- 'hybrid_count'
mj_smallRNA <- smallRNA_sum[, c(4,8)]
mj_match_list <- merge(mj_match_list, mj_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(mj_match_list)[names(mj_match_list) == 'MJ_sum'] <- 'miRNA_count'
names(mj_match_list)[names(mj_match_list) == 'n'] <- 'hybrid_count'
Count total RNAseq data, add to match list
IPC_totalRNA <- totalRNA_sum[, c(1,2,3)]
ipc_match_list <- merge(ipc_match_list, IPC_totalRNA, by = "mRNA", .keep_all = TRUE)
names(ipc_match_list)[names(ipc_match_list) == 'IPC_sum'] <- 'mRNA_count'
ipc_match_list$cell_line <- "IPC"
ipc_match_list$treatment <- "sensitive"
ipc_match_list <- ipc_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(ipc_match_list, "IPC_data.RDS")
sk_totalRNA <- totalRNA_sum[, c(1,2,7)]
sk_match_list <- merge(sk_match_list, sk_totalRNA, by = "mRNA", .keep_all = TRUE)
names(sk_match_list)[names(sk_match_list) == 'SK_sum'] <- 'mRNA_count'
sk_match_list$cell_line <- "SK"
sk_match_list$treatment <- "sensitive"
sk_match_list <- sk_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(sk_match_list, "SK_data.RDS")
mj_totalRNA <- totalRNA_sum[, c(1,2,5)]
mj_match_list <- merge(mj_match_list, mj_totalRNA, by = "mRNA", .keep_all = TRUE)
names(mj_match_list)[names(mj_match_list) == 'MJ_sum'] <- 'mRNA_count'
mj_match_list$cell_line <- "MJ"
mj_match_list$treatment <- "sensitive"
mj_match_list <- mj_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(mj_match_list, "MJ_data.RDS")
Repeat the above steps, using resistant cell line data
Create a generalized match list that will merge all datasets eventually
ipc_R_hybrids <- hyb_resistant %>% filter(grepl("IPC", Sample))
sk_R_hybrids <- hyb_resistant %>% filter(grepl("SK", Sample))
ipc_R_match_list <- select(ipc_R_hybrids, miRNA, MIMAT, mRNA, hybrids)
sk_R_match_list <- select(sk_R_hybrids, miRNA, MIMAT, mRNA, hybrids)
ipc_R_match_list <- distinct(ipc_R_match_list, hybrids, .keep_all = TRUE)
sk_R_match_list <- distinct(sk_R_match_list, hybrids, .keep_all = TRUE)
Count hybrids, add to match list
IPC_R_hyb <- ipc_R_hybrids %>%
group_by(hybrids) %>%
tally(sort = TRUE)
SK_R_hyb <- sk_R_hybrids %>%
group_by(hybrids) %>%
tally(sort = TRUE)
ipc_R_match_list <- merge(ipc_R_match_list, IPC_R_hyb, by = 'hybrids', .keep_all = TRUE)
sk_R_match_list <- merge(sk_R_match_list, SK_R_hyb, by = "hybrids", .keep_all = TRUE)
Count small RNAseq data, add to match list
IPC_R_smallRNA <- smallRNA_sum[, c(3,8)]
ipc_R_match_list <- merge(ipc_R_match_list, IPC_R_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(ipc_R_match_list)[names(ipc_R_match_list) == 'IPC_R_sum'] <- 'miRNA_count'
names(ipc_R_match_list)[names(ipc_R_match_list) == 'n'] <- 'hybrid_count'
#saveRDS(ipc_R_match_list, "IPC_R_data.RDS")
sk_R_smallRNA <- smallRNA_sum[, c(7,8)]
sk_R_match_list <- merge(sk_R_match_list, sk_R_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(sk_R_match_list)[names(sk_R_match_list) == 'SK_R_sum'] <- 'miRNA_count'
names(sk_R_match_list)[names(sk_R_match_list) == 'n'] <- 'hybrid_count'
#saveRDS(sk_R_match_list, "SK_R_data.RDS")
Count total RNAseq data, add to match list
IPC_R_totalRNA <- totalRNA_sum[, c(1,2,4)]
ipc_R_match_list <- merge(ipc_R_match_list, IPC_R_totalRNA, by = "mRNA", .keep_all = TRUE)
names(ipc_R_match_list)[names(ipc_R_match_list) == 'IPC_R_sum'] <- 'mRNA_count'
ipc_R_match_list$cell_line <- "IPC_R"
ipc_R_match_list$treatment <- "resistant"
ipc_R_match_list <- ipc_R_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(ipc_R_match_list, "IPC_R_data.RDS")
sk_R_totalRNA <- totalRNA_sum[, c(1,2,8)]
sk_R_match_list <- merge(sk_R_match_list, sk_R_totalRNA, by = "mRNA", .keep_all = TRUE)
names(sk_R_match_list)[names(sk_R_match_list) == 'SK_R_sum'] <- 'mRNA_count'
sk_R_match_list$cell_line <- "SK_R"
sk_R_match_list$treatment <- "resistant"
sk_R_match_list <- sk_R_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(sk_R_match_list, "SK_R_data.RDS")
Merge all dataframes to create a singular count table of all cell lines and treatments
#SK_R_data <- readRDS("SK_R_data.rds")
#SK_data <- readRDS("SK_data.rds")
#IPC_R_data <- readRDS("IPC_R_data.rds")
#IPC_data <- readRDS("IPC_data.rds")
#MJ_data <- readRDS("MJ_data.RDS")
#count_data <- rbind(SK_R_data, SK_data, IPC_R_data, IPC_data, MJ_data)
count_data <- rbind(sk_R_match_list, sk_match_list, ipc_R_match_list, ipc_match_list, mj_match_list)
#saveRDS(count_data, "total_count_data.rds")
Create a count of whether hybrids are seen in triplicates, duplicates, or as unique hybrids
qCLASH <- bind_rows(readRDS(here::here("initial_data/hyb_sensitive.rds")) %>%
mutate(condition="sensitive") ,
readRDS(here::here("initial_data/hyb_resistant.rds")) %>%
mutate(condition="resistant"))
counts_replicates <- qCLASH %>%
group_by(condition, cell_line, hybrids) %>%
summarise(replicates = list(Sample %>% unique()), miRNA, mRNA ) %>%
mutate(total = map_int( replicates, length)) %>%
distinct() %>%
mutate(in_triplicates = factor(ifelse(total==3, "Triplicates",
ifelse(total==2, "Duplicates", "Unique")),
levels = c("Unique", "Duplicates", "Triplicates"))) %>%
arrange(condition, cell_line, hybrids, in_triplicates) %>%
ungroup() %>%
right_join(count_data, by=c("cell_line", "hybrids", "mRNA", "miRNA"))
`summarise()` has grouped output by 'condition', 'cell_line', 'hybrids'. You can override using the `.groups` argument.
Create a 3D Representation of the log Value of Counts
fig <- plot_ly(count_data, x = ~log(mRNA_count), y = ~log(hybrid_count), z = ~log(miRNA_count), color = ~cell_line, colors = c('dodgerblue4', 'darkslategray4','darkgreen', 'firebrick', 'darksalmon'))
fig <- fig %>% add_markers()
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Log(mRNA Count)'),
yaxis = list(title = 'Log(Hybrid Count)'),
zaxis = list(title = 'Log(miRNA Count)')),
title='Correlation Analysis')
fig
htmlwidgets::saveWidget(plotly::as_widget(fig), "./results/qCLASH_tot_RNAseq_spatial_representation.html")
p <- counts_replicates %>%
plotly::plot_ly(x=~log(mRNA_count), y=~log(hybrid_count), z=~log(miRNA_count), size=1, color=~in_triplicates, text=~hybrids, type="scatter3d", mode="markers")
p
htmlwidgets::saveWidget(plotly::as_widget(p), "./results/qCLASH_tot_RNAseq_triplicates.html")
---
title: "Data Counts and Visualization"
author: "Vincent Gureghian, Anthoula Gaigneaux, Hailee Herbst"
date: "4/4/2022"
output:
  html_notebook:
    highlight: default
    number_sections: yes
    theme: united
    toc: yes
    toc_depth: 2
    code: hide
  pdf_document:
    fig_caption: yes
    fig_crop: no
    number_sections: yes
    toc: yes
  html_document:
    toc: yes
    df_print: paged
---



```{r setup, include=FALSE}
library(tidyverse)
library(dplyr)
library(plotly)
library(ggpubr)
library(rgl)
library(ggplot2)
library(plot3D)
library(reshape2)
library(metR)
library(miRBaseConverter)
```


```{r}
hyb_sensitive <- readRDS(here::here("initial_data/hyb_sensitive.rds"))
hyb_resistant <- readRDS(here::here("initial_data/hyb_resistant.rds"))
totalRNA_count <- readRDS(here::here("initial_data/totalRNA_count.rds"))
smallRNA_count <- readRDS(here::here("initial_data/smallRNA_count.rds"))
totalRNAseq <- readRDS(here::here("initial_data/total_RNAseq_all_annot_results.rds"))
DEGs_smallRNAseq <- readRDS(here::here("initial_data/DEGs_smallRNAseq.rds"))
```


# Tabulate a sum list of all RNA and smallRNA counts by cell line/treatment type
```{r}
totalRNA_count$IPC_sum <- rowSums(totalRNA_count[ , c(3:5,21:23)], na.rm=TRUE)
totalRNA_count$IPC_R_sum <- rowSums(totalRNA_count[ , c(6:8, 24:26)], na.rm=TRUE)
totalRNA_count$MJ_sum <- rowSums(totalRNA_count[ , c(9:11,27:29)], na.rm=TRUE)
totalRNA_count$MJ_R_sum <- rowSums(totalRNA_count[ , c(12:14, 30:32)], na.rm=TRUE)
totalRNA_count$SK_sum <- rowSums(totalRNA_count[ , c(15:17, 33:35)], na.rm=TRUE)
totalRNA_count$SK_R_sum <- rowSums(totalRNA_count[ , c(18:20,36:38)], na.rm=TRUE)
totalRNA_sum <- totalRNA_count[,c(1,2,39:44)]
names(totalRNA_sum)[names(totalRNA_sum) == 'gene_name'] <- 'mRNA'
#saveRDS(totalRNA_sum, "totalRNA_sum.RDS")

smallRNA_count$IPC_sum <- rowSums(smallRNA_count[ , c(3:5)], na.rm=TRUE)
smallRNA_count$IPC_R_sum <- rowSums(smallRNA_count[ , c(6:8)], na.rm=TRUE)
smallRNA_count$MJ_sum <- rowSums(smallRNA_count[ , c(9:11)], na.rm=TRUE)
smallRNA_count$MJ_R_sum <- rowSums(smallRNA_count[ , c(12:14)], na.rm=TRUE)
smallRNA_count$SK_sum <- rowSums(smallRNA_count[ , c(15:17)], na.rm=TRUE)
smallRNA_count$SK_R_sum <- rowSums(smallRNA_count[ , c(18:20)], na.rm=TRUE)
smallRNA_sum <- smallRNA_count[, c(1,20:25)]
#saveRDS(smallRNA_sum, "smallRNA_sum.RDS")


miRNANames = smallRNA_sum$Molecule
smallRNA_sum2 <- miRNA_NameToAccession(miRNANames,version = "v18")
names(smallRNA_sum2)[names(smallRNA_sum2) == 'miRNAName_v18'] <- 'Molecule'
smallRNA_sum <- merge(smallRNA_sum, smallRNA_sum2, by = "Molecule")
names(smallRNA_sum)[names(smallRNA_sum) == 'Accession'] <- 'MIMAT'
#smallRNA_sum <- smallRNA_sum[, c(1, 20, 2:19)]

```


# Add MIMAT ID to small RNAseq data
```{r}
DEGs_smallRNAseq$gene_name = paste('hsa', DEGs_smallRNAseq$gene_name, sep='-')

miRNANames = DEGs_smallRNAseq$gene_name
DEGs_smallRNAseq2 <- miRNA_NameToAccession(miRNANames,version = "v18")

names(DEGs_smallRNAseq2)[names(DEGs_smallRNAseq2) == 'miRNAName_v18'] <- 'gene_name'
DEGs_smallRNAseq2 <- distinct(DEGs_smallRNAseq2, gene_name, .keep_all = TRUE)
DEGs_smallRNAseq <- merge(DEGs_smallRNAseq, DEGs_smallRNAseq2, by = "gene_name")

names(DEGs_smallRNAseq)[names(DEGs_smallRNAseq) == 'Accession'] <- 'MIMAT'
#saveRDS(DEGs_smallRNAseq, "MIMAT_smallRNA_DEGs.rds")
```


# Beginning with sensitive cell lines, creating a count table of miRNA, mRNA, and hybrids

# Create a generalized match list that will merge all datasets eventually
```{r}
ipc_hybrids <- hyb_sensitive %>% filter(grepl("IPC", Sample))
sk_hybrids <- hyb_sensitive %>% filter(grepl("SK", Sample))
mj_hybrids <- hyb_sensitive %>% filter(grepl("MJ", Sample))

ipc_match_list <- select(ipc_hybrids, miRNA, MIMAT, mRNA, hybrids) 
sk_match_list <- select(sk_hybrids, miRNA, MIMAT, mRNA, hybrids)
mj_match_list <- select(mj_hybrids, miRNA, MIMAT, mRNA, hybrids)

ipc_match_list <- distinct(ipc_match_list, hybrids, .keep_all = TRUE)
sk_match_list <- distinct(sk_match_list, hybrids, .keep_all = TRUE)
mj_match_list <- distinct(mj_match_list, hybrids, .keep_all = TRUE)

```


# Count hybrids, add to match list
```{r}
IPC_hyb <- ipc_hybrids %>%
  group_by(hybrids) %>%
  tally(sort = TRUE)
ipc_match_list <- merge(ipc_match_list, IPC_hyb, by = 'hybrids', .keep_all = TRUE)

SK_hyb <- sk_hybrids %>%
  group_by(hybrids) %>%
  tally(sort = TRUE)
sk_match_list <- merge(sk_match_list, SK_hyb, by = "hybrids", .keep_all = TRUE)

MJ_hyb <- mj_hybrids %>%
  group_by(hybrids) %>%
  tally(sort = TRUE)
mj_match_list <- merge(mj_match_list, MJ_hyb, by = "hybrids", .keep_all = TRUE)
```


# Count small RNAseq data, add to match list
```{r}
IPC_smallRNA <- smallRNA_sum[, c(2,8)]
ipc_match_list <- merge(ipc_match_list, IPC_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(ipc_match_list)[names(ipc_match_list) == 'IPC_sum'] <- 'miRNA_count'
names(ipc_match_list)[names(ipc_match_list) == 'n'] <- 'hybrid_count'

sk_smallRNA <- smallRNA_sum[, c(6,8)]
sk_match_list <- merge(sk_match_list, sk_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(sk_match_list)[names(sk_match_list) == 'SK_sum'] <- 'miRNA_count'
names(sk_match_list)[names(sk_match_list) == 'n'] <- 'hybrid_count'

mj_smallRNA <- smallRNA_sum[, c(4,8)]
mj_match_list <- merge(mj_match_list, mj_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(mj_match_list)[names(mj_match_list) == 'MJ_sum'] <- 'miRNA_count'
names(mj_match_list)[names(mj_match_list) == 'n'] <- 'hybrid_count'
```


# Count total RNAseq data, add to match list
```{r}
IPC_totalRNA <- totalRNA_sum[, c(1,2,3)]
ipc_match_list <- merge(ipc_match_list, IPC_totalRNA, by = "mRNA", .keep_all = TRUE)
names(ipc_match_list)[names(ipc_match_list) == 'IPC_sum'] <- 'mRNA_count'
ipc_match_list$cell_line <- "IPC"
ipc_match_list$treatment <- "sensitive"
ipc_match_list <- ipc_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(ipc_match_list, "IPC_data.RDS")

sk_totalRNA <- totalRNA_sum[, c(1,2,7)]
sk_match_list <- merge(sk_match_list, sk_totalRNA, by = "mRNA", .keep_all = TRUE)
names(sk_match_list)[names(sk_match_list) == 'SK_sum'] <- 'mRNA_count'
sk_match_list$cell_line <- "SK"
sk_match_list$treatment <- "sensitive"
sk_match_list <- sk_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(sk_match_list, "SK_data.RDS")

mj_totalRNA <- totalRNA_sum[, c(1,2,5)]
mj_match_list <- merge(mj_match_list, mj_totalRNA, by = "mRNA", .keep_all = TRUE)
names(mj_match_list)[names(mj_match_list) == 'MJ_sum'] <- 'mRNA_count'
mj_match_list$cell_line <- "MJ"
mj_match_list$treatment <- "sensitive"
mj_match_list <- mj_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(mj_match_list, "MJ_data.RDS")
```


# Repeat the above steps, using resistant cell line data


# Create a generalized match list that will merge all datasets eventually
```{r}
ipc_R_hybrids <- hyb_resistant %>% filter(grepl("IPC", Sample))
sk_R_hybrids <- hyb_resistant %>% filter(grepl("SK", Sample))

ipc_R_match_list <- select(ipc_R_hybrids, miRNA, MIMAT, mRNA, hybrids) 
sk_R_match_list <- select(sk_R_hybrids, miRNA, MIMAT, mRNA, hybrids)

ipc_R_match_list <- distinct(ipc_R_match_list, hybrids, .keep_all = TRUE)
sk_R_match_list <- distinct(sk_R_match_list, hybrids, .keep_all = TRUE)
```



# Count hybrids, add to match list
```{r}
IPC_R_hyb <- ipc_R_hybrids %>%
  group_by(hybrids) %>%
  tally(sort = TRUE)

SK_R_hyb <- sk_R_hybrids %>%
  group_by(hybrids) %>%
  tally(sort = TRUE)

ipc_R_match_list <- merge(ipc_R_match_list, IPC_R_hyb, by = 'hybrids', .keep_all = TRUE)
sk_R_match_list <- merge(sk_R_match_list, SK_R_hyb, by = "hybrids", .keep_all = TRUE)
```


# Count small RNAseq data, add to match list
```{r}
IPC_R_smallRNA <- smallRNA_sum[, c(3,8)]
ipc_R_match_list <- merge(ipc_R_match_list, IPC_R_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(ipc_R_match_list)[names(ipc_R_match_list) == 'IPC_R_sum'] <- 'miRNA_count'
names(ipc_R_match_list)[names(ipc_R_match_list) == 'n'] <- 'hybrid_count'
#saveRDS(ipc_R_match_list, "IPC_R_data.RDS")

sk_R_smallRNA <- smallRNA_sum[, c(7,8)]
sk_R_match_list <- merge(sk_R_match_list, sk_R_smallRNA, by = 'MIMAT', .keep_all = TRUE)
names(sk_R_match_list)[names(sk_R_match_list) == 'SK_R_sum'] <- 'miRNA_count'
names(sk_R_match_list)[names(sk_R_match_list) == 'n'] <- 'hybrid_count'
#saveRDS(sk_R_match_list, "SK_R_data.RDS")
```

# Count total RNAseq data, add to match list
```{r}

IPC_R_totalRNA <- totalRNA_sum[, c(1,2,4)]
ipc_R_match_list <- merge(ipc_R_match_list, IPC_R_totalRNA, by = "mRNA", .keep_all = TRUE)
names(ipc_R_match_list)[names(ipc_R_match_list) == 'IPC_R_sum'] <- 'mRNA_count'
ipc_R_match_list$cell_line <- "IPC_R"
ipc_R_match_list$treatment <- "resistant"
ipc_R_match_list <- ipc_R_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(ipc_R_match_list, "IPC_R_data.RDS")

sk_R_totalRNA <- totalRNA_sum[, c(1,2,8)]
sk_R_match_list <- merge(sk_R_match_list, sk_R_totalRNA, by = "mRNA", .keep_all = TRUE)
names(sk_R_match_list)[names(sk_R_match_list) == 'SK_R_sum'] <- 'mRNA_count'
sk_R_match_list$cell_line <- "SK_R"
sk_R_match_list$treatment <- "resistant"
sk_R_match_list <- sk_R_match_list[, c(9,10,3,1,7,4,2,5,8,6)]
#saveRDS(sk_R_match_list, "SK_R_data.RDS")
```


# Merge all dataframes to create a singular count table of all cell lines and treatments
```{r}
#SK_R_data <- readRDS("SK_R_data.rds")
#SK_data <- readRDS("SK_data.rds")
#IPC_R_data <- readRDS("IPC_R_data.rds")
#IPC_data <- readRDS("IPC_data.rds")
#MJ_data <- readRDS("MJ_data.RDS")
#count_data <- rbind(SK_R_data, SK_data, IPC_R_data, IPC_data, MJ_data)

count_data <- rbind(sk_R_match_list, sk_match_list, ipc_R_match_list, ipc_match_list, mj_match_list)

#saveRDS(count_data, "total_count_data.rds")
```

# Create a count of whether hybrids are seen in triplicates, duplicates, or as unique hybrids
```{r}
qCLASH <- bind_rows(readRDS(here::here("initial_data/hyb_sensitive.rds")) %>% 
                      mutate(condition="sensitive") ,
                    readRDS(here::here("initial_data/hyb_resistant.rds")) %>% 
                      mutate(condition="resistant"))


counts_replicates <- qCLASH %>% 
  group_by(condition, cell_line, hybrids) %>% 
  summarise(replicates = list(Sample %>% unique()), miRNA, mRNA ) %>%
  mutate(total = map_int( replicates, length)) %>%
  distinct() %>% 
  mutate(in_triplicates = factor(ifelse(total==3, "Triplicates", 
                                            ifelse(total==2, "Duplicates", "Unique")),
                                levels = c("Unique", "Duplicates", "Triplicates"))) %>%
  arrange(condition, cell_line, hybrids, in_triplicates) %>% 
                       ungroup() %>%
  right_join(count_data, by=c("cell_line", "hybrids", "mRNA", "miRNA"))

```

# Create a 3D Representation of the log Value of Counts
```{r}
fig <- plot_ly(count_data, x = ~log(mRNA_count), y = ~log(hybrid_count), z = ~log(miRNA_count), color = ~cell_line, colors = c('dodgerblue4', 'darkslategray4','darkgreen', 'firebrick', 'darksalmon'))
fig <- fig %>% add_markers()
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Log(mRNA Count)'),
                                   yaxis = list(title = 'Log(Hybrid Count)'),
                                   zaxis = list(title = 'Log(miRNA Count)')),
                      title='Correlation Analysis')

fig
htmlwidgets::saveWidget(plotly::as_widget(fig),  "./results/qCLASH_tot_RNAseq_spatial_representation.html")
```



```{r}
p <- counts_replicates %>% 
  plotly::plot_ly(x=~log(mRNA_count), y=~log(hybrid_count), z=~log(miRNA_count), size=1, color=~in_triplicates, text=~hybrids, type="scatter3d", mode="markers")
p
htmlwidgets::saveWidget(plotly::as_widget(p),  "./results/qCLASH_tot_RNAseq_triplicates.html")
```



